import sys, os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')))
import json
from preprossData.utils.crop_padding import crop_resize_save

folder_to_description = {
    "0.0.Normal": "Normal orange-red fundus with red branched curving vasculature entering the pink optic disc with sharp margins and a cup-to-disc ratio of approximately 0.35",
    "0.1.Tessellated fundus": "Tessellated fundus: diffuse attenuation of the retinal pigment epithelium with visibility of large choroidal vessels",
    "0.2.Large optic cup": "Large optic cup: cup-to-disc ratio > 0.5, with a pink neuroretinal rim following the ISNT rule, without notching or bayoneting of vessels",
    "0.3.DR1": "Microaneurysms only (International Classification of Diabetic Retinopathy 2017)",
    "1.0.DR2": "Microaneurysms and other signs (dot and blot hemorrhages, hard exudates), less than severe nonproliferative Diabetic Retinopathy and/or with diabetic macular edema",
    "1.1.DR3": "Severe nonproliferative Diabetic Retinopathy and proliferative Diabetic Retinopathy (neovascularization, vitreous/preretinal hemorrhage)",
    "2.0.BRVO": "Tortuosity and dilatation of affected veins, with dot, blot, and flame hemorrhages, sometimes accompanied by cotton-wool spots or hard exudates (Branch Retinal Vein Occlusion)",
    "2.1.CRVO": "Tortuosity and dilatation of all venous branches, with dot, blot, and flame hemorrhages, sometimes accompanied by cotton-wool spots or hard exudates (Central Retinal Vein Occlusion)",
    "3.RAO": "Attenuation of arteries and veins, with a cherry red fovea contrasting the cloudy white edematous retina due to arterial occlusion (Retinal Artery Occlusion)",
    "4.Rhegmatogenous RD": "Slightly opaque, convex or corrugated appearance of the elevated retina, sometimes with visible retinal breaks (Retinal Detachment)",
    "5.0.CSCR": "Round or oval retinal elevation with clear or turbid fluid underneath, sometimes with depigmented retinal pigment epithelium foci or small patches of atrophy/hyperplasia (Central Serous Chorioretinopathy)",
    "5.1.VKH disease": "Circumscribed retinal edema, multiple exudative retinal detachments of the posterior retina, often with optic disc hyperemia and edema, and with slight radial folds during resolution (Vogt-Koyanagi-Harada disease)",
    "6.Maculopathy": "Lesions within the macular area, such as intermediate Age-related Macular Degeneration (drusen >125 μm), neovascular Age-related Macular Degeneration, retinal angiomatous proliferation, polypoidal choroidal vasculopathy, choroidal neovascularization, idiopathic macular telangiectasia, and macular atrophy (not due to other listed diseases)",
    "7.ERM": "Epiretinal membrane presenting as a cellophane-like sheen on or above the retinal surface, often with macular pucker and distortion of vascular architecture",
    "8.MH": "Macular hole with a central foveal defect, round or oval in shape, possibly with multiple yellow deposits within the crater or a cuff of subretinal fluid",
    "9.Pathological myopia": "Tessellated fundus with focal chorioretinal atrophy, Fuchs spot, lacquer cracks, choroidal neovascularization or subretinal hemorrhage",
    "10.0.Possible glaucoma": "Large cup-to-disc ratio with cup excavation, thinning of the neuroretinal rim, notching and bayoneting of vessels with retinal nerve fiber layer defects, disc hemorrhages, baring of circumlinear vessels, laminar dot sign, and peripapillary atrophy",
    "10.1.Optic atrophy": "White optic disc with reduction of small vessels on the disc, attenuation of peripapillary vessels, and thinning of the retinal nerve fiber layer, sometimes with Paton lines",
    "11.Severe hypertensive retinopathy": "Cotton-wool spots, arteriolar narrowing, arteriolosclerosis, flame-shaped hemorrhages, retinal edema, macular star formation, and disc edema",
    "12.Disc swelling and elevation": "Disc hyperemia with elevation of indistinct disc margins, sometimes accompanied by peripapillary flame hemorrhages and cotton-wool spots",
    "13.Dragged Disc": "Temporal vascular straightening with a retinal fold or vitreous bands extending from the peripheral retina to the disc",
    "14.Congenital disc abnormality": "Includes optic disc coloboma, morning glory anomaly, optic disc pit, megalopapilla, and hypoplastic disc",
    "15.0.Retinitis pigmentosa": "Mid-peripheral retinal pigment epithelium atrophy with bone-spicule perivascular pigmentation, arteriolar attenuation, and waxy pallor of the optic disc",
    "15.1.Bietti crystalline dystrophy": "Numerous fine, glistening yellow-white crystals with atrophy of the retinal pigment epithelium and choriocapillaris, while the optic disc and retinal vasculature remain normal",
    "16.Peripheral retinal degeneration and break": "Lattice, snailtrack, pavingstone, honeycomb patterns, peripheral drusen, microcystoid changes and white-without pressure areas, sometimes with retinal breaks",
    "17.Myelinated nerve fiber": "Whitish striated patches with feathery borders that obscure retinal vessels",
    "18.Vitreous particles": "Including asteroid hyalosis, synchysis scintillans, and deposits seen in familial amyloidosis",
    "19.Fundus neoplasm": "Slightly elevated, dome- or mushroom-shaped mass of various colors",
    "20.Massive hard exudates": "Waxy yellow lesions with distinct margins arranged in large clumps, typically due to vessel abnormalities",
    "21.Yellow-white spots-flecks": "Multiple, discrete, yellow-white round dots or fleck lesions, as seen in early Age-related Macular Degeneration (drusen <125 μm)",
    "22.Cotton-wool spots": "Small, whitish, fluffy superficial lesions in the post-equatorial fundus",
    "23.Vessel tortuosity": "Tortuous and sometimes dilated arteries and veins, either localized or diffuse",
    "24.Chorioretinal atrophy-coloboma": "Focal or extensive atrophy of the retinal pigment epithelium and choroid, or a coloboma",
    "25.Preretinal hemorrhage": "A usually round red lesion that obscures underlying retinal landmarks, sometimes with a boat-shaped crescent configuration; hemorrhage may extend into the vitreous",
    "26.Fibrosis": "Irregular greyish-white opacification often accompanied by distortion of retinal vessels that may cross vessel arches",
    "27.Laser Spots": "Multiple, uniform, round, discrete yellow-white or brown lesions caused by photocoagulation",
    "28.Silicon oil in eye": "Shiny reflection from the retina-oil interface",
    "29.0.Blur fundus without PDR": "Blurred retinal landmarks due to severe lens opacities, vitreous opacities, or hemorrhage, without signs of proliferative Diabetic Retinopathy",
    "29.1.Blur fundus with suspected PDR": "Blurred retinal landmarks with suspected features of proliferative Diabetic Retinopathy"
}

def gather_data(data_path, tar_path, prefix='jsiec'):
    """
    对新数据集进行预处理：
      - 数据集根目录下各文件夹名与 folder_to_description 中的 key 对应，每个文件夹内存储一种类别的图像，
        其对应的描述信息即为 diagnosis 的 text。
      - 将所有处理后的图像统一保存到 tar_path/images 下，
        图片名称格式为 {prefix}_{原始文件名}.png，其中仅仅是修改后缀名并加上前缀。
      - 将所有标注信息保存到 tar_path/annotations.json。
    
    参数：
        data_path (str): 新数据集根目录，要求其下存在 folder_to_description 字典中所列的文件夹。
        tar_path (str): 预处理后数据存放目录。
        prefix (str): 处理后图片名称的前缀，默认为 "jsiec".
    
    返回：
        dict: 标注信息字典。
    """
    os.makedirs(tar_path, exist_ok=True)
    images_dir = os.path.join(tar_path, 'images')
    os.makedirs(images_dir, exist_ok=True)
    
    data_dict = {}
    
    # 遍历 folder_to_description 中的每个类别文件夹
    for folder_name, diagnosis_text in folder_to_description.items():
        folder_path = os.path.join(data_path, folder_name)
        if not os.path.exists(folder_path):
            raise ValueError(f"Folder {folder_path} does not exist.")
        # 遍历该类别文件夹中的所有图像文件
        for image_file in sorted(os.listdir(folder_path)):
            if not image_file.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp', '.ppm')):
                continue
            src_image_path = os.path.join(folder_path, image_file)
            # 保留原始文件名，仅修改后缀并加上前缀
            base_name = os.path.splitext(image_file)[0]
            new_image_name = f"{prefix}_{base_name}.png"
            dest_image_path = os.path.join(images_dir, new_image_name)
            
            # 对图像进行裁剪与 resize，统一保存为 PNG 格式
            crop_info=crop_resize_save(
                image_path=src_image_path,
                save_path=dest_image_path,
                resize=(224, 224),
                crop_threshold=25
            )
            
            # 保存标注信息
            data_dict[new_image_name] = {
                'image_name': new_image_name,
                'image_path': os.path.join('images', new_image_name),
                'original_path': os.path.relpath(src_image_path, data_path),
                'crop_info':crop_info,
                'diagnosis': {
                    'text': diagnosis_text
                }
            }
    
    # 保存标注信息到 tar_path/annotations.json
    annotations_path = os.path.join(tar_path, 'annotations.json')
    with open(annotations_path, 'w', encoding='utf-8') as f:
        json.dump(data_dict, f, indent=4)
    
    return data_dict

if __name__ == "__main__":
    import argparse
    parser = argparse.ArgumentParser(description="新数据集预处理")
    parser.add_argument("--data_path", type=str, default="../Dataset/new_dataset",
                        help="新数据集根目录，要求其下包含类别文件夹")
    parser.add_argument("--tar_path", type=str, default="../Dataset/processed224_new",
                        help="预处理后数据存放目录")
    parser.add_argument("--prefix", type=str, default="jsiec",
                        help="处理后图片名称的前缀，默认为 'jsiec'")
    args = parser.parse_args()
    
    annotations = gather_data(args.data_path, args.tar_path, prefix=args.prefix)
    print("Preprocessing completed. Annotations saved.")
